{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RAG using SambaNova and Meta AI Llama-3 \n",
    "\n",
    "\n",
    "<img src=\"./resources/thumbnail.png\" width=800px>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import openai\n",
    "\n",
    "import nest_asyncio\n",
    "from dotenv import load_dotenv\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "from llama_index.core import Settings\n",
    "from llama_index.llms.ollama import Ollama\n",
    "from llama_index.core import PromptTemplate\n",
    "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
    "from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader\n",
    "\n",
    "from llama_index.llms.sambanovasystems import SambaNovaCloud\n",
    "\n",
    "from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
    "from llama_index.core import Settings\n",
    "import qdrant_client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# allows nested access to the event loop\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# add your documents in this directory, you can drag & drop\n",
    "input_dir_path = '/teamspace/studios/this_studio/test-dir'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "collection_name=\"chat_with_docs\"\n",
    "\n",
    "client = qdrant_client.QdrantClient(\n",
    "    host=\"localhost\",\n",
    "    port=6333\n",
    ")\n",
    "\n",
    "def create_index(documents):\n",
    "    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)\n",
    "    storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
    "    index = VectorStoreIndex.from_documents(\n",
    "        documents,\n",
    "        storage_context=storage_context,\n",
    "    )\n",
    "    return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7b4ba9e36b4e47b982be21b95b24a181",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bf2ebc67bf4a4caf8c6292b80f869b7c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8e41ff80db1a44a1ac3dc99fc477a819",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "17460d4930c241c8a7af9208b82d1310",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1418bcfbba844062a80299a82f04d21d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f73ccdc9f6be4b9e9e5d69d3de936ec1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "# setup llm & embedding model\n",
    "llm = SambaNovaCloud(model=\"Meta-Llama-3.3-70B-Instruct\", temperature=0.7, top_p=0.01)\n",
    "\n",
    "embed_model = HuggingFaceEmbedding( model_name=\"BAAI/bge-large-en-v1.5\", trust_remote_code=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b9f486b6a1da4f15bb0e43469fa8c420",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Parsing nodes:   0%|          | 0/17 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "363a055481fb4d808da9551727ee5307",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating embeddings:   0%|          | 0/26 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# load data\n",
    "loader = SimpleDirectoryReader(\n",
    "            input_dir = input_dir_path,\n",
    "            required_exts=[\".pdf\"],\n",
    "            recursive=True\n",
    "        )\n",
    "docs = loader.load_data()\n",
    "\n",
    "# Creating an index over loaded data\n",
    "Settings.embed_model = embed_model\n",
    "try:\n",
    "    index = create_index(docs)\n",
    "    print('Using Qdrant collection')\n",
    "except:\n",
    "    index = VectorStoreIndex.from_documents(docs, show_progress=True)\n",
    "\n",
    "# Create the query engine, where we use a cohere reranker on the fetched nodes\n",
    "Settings.llm = llm\n",
    "query_engine = index.as_query_engine()\n",
    "\n",
    "# ====== Customise prompt template ======\n",
    "qa_prompt_tmpl_str = (\n",
    "\"Context information is below.\\n\"\n",
    "\"---------------------\\n\"\n",
    "\"{context_str}\\n\"\n",
    "\"---------------------\\n\"\n",
    "\"Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\\n\"\n",
    "\"Query: {query_str}\\n\"\n",
    "\"Answer: \"\n",
    ")\n",
    "qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)\n",
    "\n",
    "query_engine.update_prompts(\n",
    "    {\"response_synthesizer:text_qa_template\": qa_prompt_tmpl}\n",
    ")\n",
    "\n",
    "# Generate the response\n",
    "response = query_engine.query(\"What exactly is DSPy?\",)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "DSPy is a framework for programmatically solving advanced tasks with language and retrieval models through composing and declaring modules. It aims to replace brittle \"prompt engineering\" tricks with composable modules and automatic optimizers, allowing developers to define signatures that specify what a language model (LM) needs to do declaratively."
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(str(response)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ❗️❗️ Make sure you clear GPU memory by clicking on Restart button above, if you want to use Streamlit from here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sat Dec  7 08:31:49 2024       \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 535.216.03             Driver Version: 535.216.03   CUDA Version: 12.2     |\n",
      "|-----------------------------------------+----------------------+----------------------+\n",
      "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
      "|                                         |                      |               MIG M. |\n",
      "|=========================================+======================+======================|\n",
      "|   0  NVIDIA L4                      Off | 00000000:35:00.0 Off |                    0 |\n",
      "| N/A   36C    P0              31W /  72W |  19895MiB / 23034MiB |      0%      Default |\n",
      "|                                         |                      |                  N/A |\n",
      "+-----------------------------------------+----------------------+----------------------+\n",
      "                                                                                         \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| Processes:                                                                            |\n",
      "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
      "|        ID   ID                                                             Usage      |\n",
      "|=======================================================================================|\n",
      "+---------------------------------------------------------------------------------------+\n"
     ]
    }
   ],
   "source": [
    "# check GPU usage\n",
    "\n",
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: llama-index-vector-stores-qdrant in /opt/anaconda3/lib/python3.12/site-packages (0.4.0)\n",
      "Requirement already satisfied: grpcio<2.0.0,>=1.60.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-vector-stores-qdrant) (1.68.1)\n",
      "Requirement already satisfied: llama-index-core<0.13.0,>=0.12.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-vector-stores-qdrant) (0.12.8)\n",
      "Requirement already satisfied: qdrant-client>=1.7.1 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-vector-stores-qdrant) (1.12.1)\n",
      "Requirement already satisfied: PyYAML>=6.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy>=1.4.49 in /opt/anaconda3/lib/python3.12/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (2.0.34)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (3.10.5)\n",
      "Requirement already satisfied: dataclasses-json in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (0.6.7)\n",
      "Requirement already satisfied: deprecated>=1.2.9.3 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.2.15)\n",
      "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.0.8)\n",
      "Requirement already satisfied: filetype<2.0.0,>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.2.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (2024.6.1)\n",
      "Requirement already satisfied: httpx in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (0.27.0)\n",
      "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.6.0)\n",
      "Requirement already satisfied: networkx>=3.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (3.3)\n",
      "Requirement already satisfied: nltk>3.8.1 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (3.9.1)\n",
      "Requirement already satisfied: numpy in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.26.4)\n",
      "Requirement already satisfied: pillow>=9.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (10.4.0)\n",
      "Requirement already satisfied: pydantic>=2.8.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (2.9.2)\n",
      "Requirement already satisfied: requests>=2.31.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (2.32.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (8.5.0)\n",
      "Requirement already satisfied: tiktoken>=0.3.3 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (0.7.0)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (4.66.5)\n",
      "Requirement already satisfied: typing-extensions>=4.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (4.12.2)\n",
      "Requirement already satisfied: typing-inspect>=0.8.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (0.9.0)\n",
      "Requirement already satisfied: wrapt in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.14.1)\n",
      "Requirement already satisfied: grpcio-tools>=1.41.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client>=1.7.1->llama-index-vector-stores-qdrant) (1.68.0)\n",
      "Requirement already satisfied: portalocker<3.0.0,>=2.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client>=1.7.1->llama-index-vector-stores-qdrant) (2.10.1)\n",
      "Requirement already satisfied: urllib3<3,>=1.26.14 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client>=1.7.1->llama-index-vector-stores-qdrant) (1.26.20)\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (2.4.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.2.0)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (24.3.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.4.0)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (6.0.4)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.11.0)\n",
      "Requirement already satisfied: protobuf<6.0dev,>=5.26.1 in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant-client>=1.7.1->llama-index-vector-stores-qdrant) (5.28.3)\n",
      "Requirement already satisfied: setuptools in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant-client>=1.7.1->llama-index-vector-stores-qdrant) (75.1.0)\n",
      "Requirement already satisfied: anyio in /opt/anaconda3/lib/python3.12/site-packages (from httpx->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (4.2.0)\n",
      "Requirement already satisfied: certifi in /opt/anaconda3/lib/python3.12/site-packages (from httpx->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (2024.12.14)\n",
      "Requirement already satisfied: httpcore==1.* in /opt/anaconda3/lib/python3.12/site-packages (from httpx->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.0.2)\n",
      "Requirement already satisfied: idna in /opt/anaconda3/lib/python3.12/site-packages (from httpx->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (3.7)\n",
      "Requirement already satisfied: sniffio in /opt/anaconda3/lib/python3.12/site-packages (from httpx->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /opt/anaconda3/lib/python3.12/site-packages (from httpcore==1.*->httpx->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (0.14.0)\n",
      "Requirement already satisfied: h2<5,>=3 in /opt/anaconda3/lib/python3.12/site-packages (from httpx[http2]>=0.20.0->qdrant-client>=1.7.1->llama-index-vector-stores-qdrant) (4.1.0)\n",
      "Requirement already satisfied: click in /opt/anaconda3/lib/python3.12/site-packages (from nltk>3.8.1->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (8.1.7)\n",
      "Requirement already satisfied: joblib in /opt/anaconda3/lib/python3.12/site-packages (from nltk>3.8.1->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.4.2)\n",
      "Requirement already satisfied: regex>=2021.8.3 in /opt/anaconda3/lib/python3.12/site-packages (from nltk>3.8.1->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (2024.9.11)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic>=2.8.0->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic>=2.8.0->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (2.23.4)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from requests>=2.31.0->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (3.3.2)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /opt/anaconda3/lib/python3.12/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (3.0.1)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from typing-inspect>=0.8.0->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (1.0.0)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/anaconda3/lib/python3.12/site-packages (from dataclasses-json->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (3.23.1)\n",
      "Requirement already satisfied: hyperframe<7,>=6.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client>=1.7.1->llama-index-vector-stores-qdrant) (6.0.1)\n",
      "Requirement already satisfied: hpack<5,>=4.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client>=1.7.1->llama-index-vector-stores-qdrant) (4.0.0)\n",
      "Requirement already satisfied: packaging>=17.0 in /opt/anaconda3/lib/python3.12/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.13.0,>=0.12.0->llama-index-vector-stores-qdrant) (23.2)\n",
      "Requirement already satisfied: llama-index in /opt/anaconda3/lib/python3.12/site-packages (0.12.1)\n",
      "Requirement already satisfied: qdrant_client in /opt/anaconda3/lib/python3.12/site-packages (1.12.1)\n",
      "Requirement already satisfied: llama-index-agent-openai<0.5.0,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.4.0)\n",
      "Requirement already satisfied: llama-index-cli<0.5.0,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.4.0)\n",
      "Requirement already satisfied: llama-index-core<0.13.0,>=0.12.1 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.12.8)\n",
      "Requirement already satisfied: llama-index-embeddings-openai<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.3.0)\n",
      "Requirement already satisfied: llama-index-indices-managed-llama-cloud>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.6.2)\n",
      "Requirement already satisfied: llama-index-legacy<0.10.0,>=0.9.48 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.9.48.post4)\n",
      "Requirement already satisfied: llama-index-llms-openai<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.3.12)\n",
      "Requirement already satisfied: llama-index-multi-modal-llms-openai<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.3.0)\n",
      "Requirement already satisfied: llama-index-program-openai<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.3.1)\n",
      "Requirement already satisfied: llama-index-question-gen-openai<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.3.0)\n",
      "Requirement already satisfied: llama-index-readers-file<0.5.0,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.4.0)\n",
      "Requirement already satisfied: llama-index-readers-llama-parse>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (0.4.0)\n",
      "Requirement already satisfied: nltk>3.8.1 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index) (3.9.1)\n",
      "Requirement already satisfied: grpcio>=1.41.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant_client) (1.68.1)\n",
      "Requirement already satisfied: grpcio-tools>=1.41.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant_client) (1.68.0)\n",
      "Requirement already satisfied: httpx>=0.20.0 in /opt/anaconda3/lib/python3.12/site-packages (from httpx[http2]>=0.20.0->qdrant_client) (0.27.0)\n",
      "Requirement already satisfied: numpy>=1.26 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant_client) (1.26.4)\n",
      "Requirement already satisfied: portalocker<3.0.0,>=2.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant_client) (2.10.1)\n",
      "Requirement already satisfied: pydantic>=1.10.8 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant_client) (2.9.2)\n",
      "Requirement already satisfied: urllib3<3,>=1.26.14 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant_client) (1.26.20)\n",
      "Requirement already satisfied: protobuf<6.0dev,>=5.26.1 in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant_client) (5.28.3)\n",
      "Requirement already satisfied: setuptools in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant_client) (75.1.0)\n",
      "Requirement already satisfied: anyio in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.20.0->httpx[http2]>=0.20.0->qdrant_client) (4.2.0)\n",
      "Requirement already satisfied: certifi in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.20.0->httpx[http2]>=0.20.0->qdrant_client) (2024.12.14)\n",
      "Requirement already satisfied: httpcore==1.* in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.20.0->httpx[http2]>=0.20.0->qdrant_client) (1.0.2)\n",
      "Requirement already satisfied: idna in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.20.0->httpx[http2]>=0.20.0->qdrant_client) (3.7)\n",
      "Requirement already satisfied: sniffio in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.20.0->httpx[http2]>=0.20.0->qdrant_client) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /opt/anaconda3/lib/python3.12/site-packages (from httpcore==1.*->httpx>=0.20.0->httpx[http2]>=0.20.0->qdrant_client) (0.14.0)\n",
      "Requirement already satisfied: h2<5,>=3 in /opt/anaconda3/lib/python3.12/site-packages (from httpx[http2]>=0.20.0->qdrant_client) (4.1.0)\n",
      "Requirement already satisfied: openai>=1.14.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-agent-openai<0.5.0,>=0.4.0->llama-index) (1.58.1)\n",
      "Requirement already satisfied: PyYAML>=6.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy>=1.4.49 in /opt/anaconda3/lib/python3.12/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.13.0,>=0.12.1->llama-index) (2.0.34)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (3.10.5)\n",
      "Requirement already satisfied: dataclasses-json in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (0.6.7)\n",
      "Requirement already satisfied: deprecated>=1.2.9.3 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (1.2.15)\n",
      "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (1.0.8)\n",
      "Requirement already satisfied: filetype<2.0.0,>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (1.2.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (2024.6.1)\n",
      "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (1.6.0)\n",
      "Requirement already satisfied: networkx>=3.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (3.3)\n",
      "Requirement already satisfied: pillow>=9.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (10.4.0)\n",
      "Requirement already satisfied: requests>=2.31.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (2.32.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (8.5.0)\n",
      "Requirement already satisfied: tiktoken>=0.3.3 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (0.7.0)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (4.66.5)\n",
      "Requirement already satisfied: typing-extensions>=4.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (4.12.2)\n",
      "Requirement already satisfied: typing-inspect>=0.8.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (0.9.0)\n",
      "Requirement already satisfied: wrapt in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-core<0.13.0,>=0.12.1->llama-index) (1.14.1)\n",
      "Requirement already satisfied: llama-cloud>=0.1.5 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-indices-managed-llama-cloud>=0.4.0->llama-index) (0.1.5)\n",
      "Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2.2.2)\n",
      "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index) (4.12.3)\n",
      "Requirement already satisfied: pypdf<6.0.0,>=5.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index) (5.1.0)\n",
      "Requirement already satisfied: striprtf<0.0.27,>=0.0.26 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-readers-file<0.5.0,>=0.4.0->llama-index) (0.0.26)\n",
      "Requirement already satisfied: llama-parse>=0.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from llama-index-readers-llama-parse>=0.4.0->llama-index) (0.5.15)\n",
      "Requirement already satisfied: click in /opt/anaconda3/lib/python3.12/site-packages (from nltk>3.8.1->llama-index) (8.1.7)\n",
      "Requirement already satisfied: joblib in /opt/anaconda3/lib/python3.12/site-packages (from nltk>3.8.1->llama-index) (1.4.2)\n",
      "Requirement already satisfied: regex>=2021.8.3 in /opt/anaconda3/lib/python3.12/site-packages (from nltk>3.8.1->llama-index) (2024.9.11)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic>=1.10.8->qdrant_client) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic>=1.10.8->qdrant_client) (2.23.4)\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.1->llama-index) (2.4.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.1->llama-index) (1.2.0)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.1->llama-index) (24.3.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.1->llama-index) (1.4.0)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.1->llama-index) (6.0.4)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.13.0,>=0.12.1->llama-index) (1.11.0)\n",
      "Requirement already satisfied: soupsieve>1.2 in /opt/anaconda3/lib/python3.12/site-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.5.0,>=0.4.0->llama-index) (2.5)\n",
      "Requirement already satisfied: hyperframe<7,>=6.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant_client) (6.0.1)\n",
      "Requirement already satisfied: hpack<5,>=4.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant_client) (4.0.0)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index) (1.9.0)\n",
      "Requirement already satisfied: jiter<1,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from openai>=1.14.0->llama-index-agent-openai<0.5.0,>=0.4.0->llama-index) (0.6.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from requests>=2.31.0->llama-index-core<0.13.0,>=0.12.1->llama-index) (3.3.2)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /opt/anaconda3/lib/python3.12/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.13.0,>=0.12.1->llama-index) (3.0.1)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from typing-inspect>=0.8.0->llama-index-core<0.13.0,>=0.12.1->llama-index) (1.0.0)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/anaconda3/lib/python3.12/site-packages (from dataclasses-json->llama-index-core<0.13.0,>=0.12.1->llama-index) (3.23.1)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.12/site-packages (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.12/site-packages (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2024.1)\n",
      "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.12/site-packages (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2023.3)\n",
      "Requirement already satisfied: packaging>=17.0 in /opt/anaconda3/lib/python3.12/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.13.0,>=0.12.1->llama-index) (23.2)\n",
      "Requirement already satisfied: six>=1.5 in /opt/anaconda3/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (1.16.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install llama-index-vector-stores-qdrant\n",
    "!pip install llama-index qdrant_client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'ff3c55a0-943b-438d-9f43-92db4192f433'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import os\n",
    "from llama_index.llms.sambanovasystems import SambaNovaCloud\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "os.getenv(\"SAMBANOVA_API_KEY\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = SambaNovaCloud(model=\"Meta-Llama-3.3-70B-Instruct\",\n",
    "                     temperature=0.7,\n",
    "                     top_p=0.01,\n",
    "                    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from llama_index.core.base.llms.types import (\n",
    "    ChatMessage,\n",
    "    MessageRole,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "assistant: J'adore la programmation.\n"
     ]
    }
   ],
   "source": [
    "system_msg = ChatMessage(\n",
    "    role=MessageRole.SYSTEM,\n",
    "    content=\"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
    ")\n",
    "user_msg = ChatMessage(role=MessageRole.USER, content=\"I love programming.\")\n",
    "\n",
    "messages = [\n",
    "    system_msg,\n",
    "    user_msg,\n",
    "]\n",
    "\n",
    "response = llm.chat(messages)\n",
    "print(response.message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rag_code import *\n",
    "from llama_index.core import SimpleDirectoryReader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size=32\n",
    "loader = SimpleDirectoryReader(input_dir = \"./docs/\",\n",
    "                               required_exts=[\".pdf\"],\n",
    "                               recursive=True)\n",
    "\n",
    "docs = loader.load_data()\n",
    "documents = [doc.text for doc in docs]\n",
    "\n",
    "# embed data    \n",
    "\n",
    "embeddata = EmbedData(embed_model_name=\"BAAI/bge-large-en-v1.5\", batch_size=batch_size)\n",
    "embeddata.embed(documents)\n",
    "\n",
    "# set up vector database\n",
    "qdrant_vdb = QdrantVDB_QB(collection_name=\"chat-with-docs2\",\n",
    "                          batch_size=batch_size,\n",
    "                          vector_dim=1024)\n",
    "\n",
    "qdrant_vdb.define_client()\n",
    "qdrant_vdb.create_collection()\n",
    "qdrant_vdb.ingest_data(embeddata=embeddata)\n",
    "\n",
    "# set up retriever\n",
    "retriever = Retriever(vector_db=qdrant_vdb, embeddata=embeddata)\n",
    "\n",
    "# set up rag\n",
    "query_engine = RAG(retriever=retriever, llm_name=\"Meta-Llama-3.3-70B-Instruct\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "messages = query_engine.query(\"What is DSPy?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[ChatMessage(role=<MessageRole.SYSTEM: 'system'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text=\"You are a helpful assistant that answers questions about the user's document.\")]),\n",
       " ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='Context information is below.\\n---------------------\\nPreprint\\n3.1 N ATURAL LANGUAGE SIGNATURES CAN ABSTRACT PROMPTING & FINETUNING\\nInstead of free-form string prompts, DSPy programs use natural language signatures to assign work\\nto the LM. A DSPy signature isnatural-language typed declaration of a function: a short declarative\\nspec that tells DSPy what a text transformation needs to do (e.g., “consume questions and return\\nanswers”), rather than how a specific LM should be prompted to implement that behavior. More\\nformally, a DSPy signature is a tuple of input fields and output fields (and an optional instruction).\\nA field consists offield name and optional metadata.4 In typical usage, the roles of fields are inferred\\nby DSPy as a function of field names. For instance, the DSPy compiler will use in-context learning\\nto interpret questiondifferently from answer and will iteratively refine its usage of these fields.\\nSignatures offer two benefits over prompts: they can be compiled into self-improving and pipeline-\\nadaptive prompts or finetunes. This is primarily done by bootstrapping (Sec 4) useful demonstrating\\nexamples for each signature. Additionally, they handle structured formatting and parsing logic to\\nreduce (or, ideally, avoid) brittle string manipulation in user programs.\\nIn practice, DSPy signatures can be expressed with a shorthand notation likequestion -> answer,\\nso that line 1 in the following is a complete DSPy program for a basic question-answering system\\n(with line 2 illustrating usage and line 3 the response when GPT-3.5 is the LM):\\n1 qa = dspy.Predict(\"question -> answer\")\\n2 qa(question=\"Where is Guaran ´ı spoken?\")\\n3 # Out: Prediction(answer=’Guaran ´ı is spoken mainly in South America.’)\\nIn the shorthand notation, each field’s name indicates the semantic role that the input (or output)\\nfield plays in the transformation. DSPy will parse this notation and expand the field names into\\nmeaningful instructions for the LM, so that english document -> french translation would\\nprompt for English to French translation. When needed, DSPy offers more advanced programming\\ninterfaces for expressing more explicit constraints on signatures (Appendix A).\\n3.2 P ARAMETERIZED & TEMPLATED MODULES CAN ABSTRACT PROMPTING TECHNIQUES\\nAkin to type signatures in programming languages, DSPy signatures simply define an interface and\\nprovide type-like hints on the expected behavior. To use a signature, we must declare amodule with\\nthat signature, like we instantiated a Predict module above. A module declaration like this returns\\na function having that signature.\\nThe Predict Module The core module for working with signatures in DSPy isPredict(simplified\\npseudocode in Appendix D.1). Internally, Predict stores the supplied signature, an optional LM to\\nuse (initially None, but otherwise overrides the default LM for this module), and a list of demon-\\nstrations for prompting (initially empty). Like layers in PyTorch, the instantiated module behaves as\\na callable function: it takes in keyword arguments corresponding to the signature input fields (e.g.,\\nquestion), formats a prompt to implement the signature and includes the appropriate demonstra-\\ntions, calls the LM, and parses the output fields. When Predict detects it’s being used in compile\\nmode, it will also internally track input/output traces to assist the teleprompter at bootstrapping the\\ndemonstrations.\\nOther Built-in ModulesDSPy modules translate prompting techniques into modular functions that\\nsupport any signature, contrasting with the standard approach of prompting LMs with task-specific\\ndetails (e.g., hand-written few-shot examples). To this end, DSPy includes a number of more sophis-\\nticated modules like ChainOfThought, ProgramOfThought, MultiChainComparison, and ReAct.5\\nThese can all be used interchangeably to implement a DSPy signature. For instance, simply chang-\\n4String descriptions of the task and the fields are also optional and usually omitted. Fields can carry optional\\nfield prefix and description. By default, fields are assumed to hold free-form strings; we are actively exploring\\noptional data type as a way to specify constraints on valid values (e.g.,boolor int) and more gracefully handle\\nformatting and parsing logic, though this feature is not core to DSPy at the time of writing.\\n5These modules generalize prompting techniques from the literature, respectively, by Wei et al. (2022),\\nChen et al. (2022), Yoran et al. (2023), and Yao et al. (2022) and, in doing so, generalize the ideas on zero-shot\\nprompting and rationale self-generation from Kojima et al. (2022), Zelikman et al. (2022), Zhang et al. (2022),\\nand Huang et al. (2022) to parameterized modules that can bootstrap arbitrary multi-stage pipelines.\\n4\\n\\n---\\n\\nPreprint\\n3.1 N ATURAL LANGUAGE SIGNATURES CAN ABSTRACT PROMPTING & FINETUNING\\nInstead of free-form string prompts, DSPy programs use natural language signatures to assign work\\nto the LM. A DSPy signature isnatural-language typed declaration of a function: a short declarative\\nspec that tells DSPy what a text transformation needs to do (e.g., “consume questions and return\\nanswers”), rather than how a specific LM should be prompted to implement that behavior. More\\nformally, a DSPy signature is a tuple of input fields and output fields (and an optional instruction).\\nA field consists offield name and optional metadata.4 In typical usage, the roles of fields are inferred\\nby DSPy as a function of field names. For instance, the DSPy compiler will use in-context learning\\nto interpret questiondifferently from answer and will iteratively refine its usage of these fields.\\nSignatures offer two benefits over prompts: they can be compiled into self-improving and pipeline-\\nadaptive prompts or finetunes. This is primarily done by bootstrapping (Sec 4) useful demonstrating\\nexamples for each signature. Additionally, they handle structured formatting and parsing logic to\\nreduce (or, ideally, avoid) brittle string manipulation in user programs.\\nIn practice, DSPy signatures can be expressed with a shorthand notation likequestion -> answer,\\nso that line 1 in the following is a complete DSPy program for a basic question-answering system\\n(with line 2 illustrating usage and line 3 the response when GPT-3.5 is the LM):\\n1 qa = dspy.Predict(\"question -> answer\")\\n2 qa(question=\"Where is Guaran ´ı spoken?\")\\n3 # Out: Prediction(answer=’Guaran ´ı is spoken mainly in South America.’)\\nIn the shorthand notation, each field’s name indicates the semantic role that the input (or output)\\nfield plays in the transformation. DSPy will parse this notation and expand the field names into\\nmeaningful instructions for the LM, so that english document -> french translation would\\nprompt for English to French translation. When needed, DSPy offers more advanced programming\\ninterfaces for expressing more explicit constraints on signatures (Appendix A).\\n3.2 P ARAMETERIZED & TEMPLATED MODULES CAN ABSTRACT PROMPTING TECHNIQUES\\nAkin to type signatures in programming languages, DSPy signatures simply define an interface and\\nprovide type-like hints on the expected behavior. To use a signature, we must declare amodule with\\nthat signature, like we instantiated a Predict module above. A module declaration like this returns\\na function having that signature.\\nThe Predict Module The core module for working with signatures in DSPy isPredict(simplified\\npseudocode in Appendix D.1). Internally, Predict stores the supplied signature, an optional LM to\\nuse (initially None, but otherwise overrides the default LM for this module), and a list of demon-\\nstrations for prompting (initially empty). Like layers in PyTorch, the instantiated module behaves as\\na callable function: it takes in keyword arguments corresponding to the signature input fields (e.g.,\\nquestion), formats a prompt to implement the signature and includes the appropriate demonstra-\\ntions, calls the LM, and parses the output fields. When Predict detects it’s being used in compile\\nmode, it will also internally track input/output traces to assist the teleprompter at bootstrapping the\\ndemonstrations.\\nOther Built-in ModulesDSPy modules translate prompting techniques into modular functions that\\nsupport any signature, contrasting with the standard approach of prompting LMs with task-specific\\ndetails (e.g., hand-written few-shot examples). To this end, DSPy includes a number of more sophis-\\nticated modules like ChainOfThought, ProgramOfThought, MultiChainComparison, and ReAct.5\\nThese can all be used interchangeably to implement a DSPy signature. For instance, simply chang-\\n4String descriptions of the task and the fields are also optional and usually omitted. Fields can carry optional\\nfield prefix and description. By default, fields are assumed to hold free-form strings; we are actively exploring\\noptional data type as a way to specify constraints on valid values (e.g.,boolor int) and more gracefully handle\\nformatting and parsing logic, though this feature is not core to DSPy at the time of writing.\\n5These modules generalize prompting techniques from the literature, respectively, by Wei et al. (2022),\\nChen et al. (2022), Yoran et al. (2023), and Yao et al. (2022) and, in doing so, generalize the ideas on zero-shot\\nprompting and rationale self-generation from Kojima et al. (2022), Zelikman et al. (2022), Zhang et al. (2022),\\nand Huang et al. (2022) to parameterized modules that can bootstrap arbitrary multi-stage pipelines.\\n4\\n---------------------\\nGiven the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don\\'t know the answer say \\'I don\\'t know!\\'.\\nQuery: What is DSPy?\\nAnswer: ')])]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "messages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<generator object llm_completion_callback.<locals>.wrap.<locals>.wrapped_llm_predict.<locals>.wrapped_gen at 0x35b122e60>"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# system_msg = ChatMessage(\n",
    "#     role=MessageRole.SYSTEM,\n",
    "#     content=\"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
    "# )\n",
    "\n",
    "\n",
    "# messages_new = [\n",
    "#     system_msg,\n",
    "#     user_msg,\n",
    "# ]\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Programming can be a fascinating and rewarding \n",
      "field. There's \n",
      "something satisfying \n",
      "about \n",
      "bringing your ideas to \n",
      "life \n",
      "with code, \n",
      "and \n",
      "the \n",
      "constant \n",
      "learning \n",
      "and \n",
      "problem-solving can be really \n",
      "engaging.\n",
      "\n",
      "What kind of programming do you enjoy most? Are \n",
      "you into web development, mobile app \n",
      "development, game development, or something \n",
      "else? Do you have a favorite programming language \n",
      "or \n",
      "\n",
      "technology stack?\n",
      "\n"
     ]
    }
   ],
   "source": [
    "full_response = \"\"\n",
    "        \n",
    "user_msg = ChatMessage(role=MessageRole.USER, content=\"I love programming.\")\n",
    "\n",
    "# Simulate stream of response with milliseconds delay\n",
    "streaming_response = query_engine.llm.stream_complete(user_msg.content)\n",
    "\n",
    "for chunk in streaming_response:\n",
    "    try:\n",
    "        new_text = chunk.raw[\"choices\"][0][\"delta\"][\"content\"]\n",
    "        full_response += new_text\n",
    "        print(new_text)\n",
    "    except:\n",
    "        pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'to interact '"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ai_stream_msgs[7].raw[\"choices\"][0][\"delta\"][\"content\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'message': ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact ')]),\n",
       " 'raw': {'choices': [{'delta': {'content': 'to interact ',\n",
       "     'role': 'assistant'},\n",
       "    'finish_reason': None,\n",
       "    'index': 0,\n",
       "    'logprobs': None}],\n",
       "  'created': 1735926260,\n",
       "  'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00',\n",
       "  'model': 'Meta-Llama-3.3-70B-Instruct',\n",
       "  'object': 'chat.completion.chunk',\n",
       "  'system_fingerprint': 'fastcoe'},\n",
       " 'delta': 'to interact ',\n",
       " 'logprobs': None,\n",
       " 'additional_kwargs': {}}"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ai_stream_msgs[7])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='')]), raw={'choices': [{'delta': {'content': '', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='')]), raw={'choices': [{'delta': {'content': '', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be ')]), raw={'choices': [{'delta': {'content': 'DSPy appears to be ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='DSPy appears to be ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework ')]), raw={'choices': [{'delta': {'content': 'a programming framework ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='a programming framework ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or ')]), raw={'choices': [{'delta': {'content': 'or ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='or ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system ')]), raw={'choices': [{'delta': {'content': 'system ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='system ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures ')]), raw={'choices': [{'delta': {'content': 'that utilizes natural language signatures ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='that utilizes natural language signatures ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact ')]), raw={'choices': [{'delta': {'content': 'to interact ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='to interact ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language ')]), raw={'choices': [{'delta': {'content': 'with Language ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='with Language ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language ')]), raw={'choices': [{'delta': {'content': '', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models ')]), raw={'choices': [{'delta': {'content': 'Models ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='Models ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for ')]), raw={'choices': [{'delta': {'content': '(LMs), allowing for ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='(LMs), allowing for ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible ')]), raw={'choices': [{'delta': {'content': 'more abstract and flexible ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='more abstract and flexible ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of ')]), raw={'choices': [{'delta': {'content': 'prompting and fine-tuning of ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='prompting and fine-tuning of ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these ')]), raw={'choices': [{'delta': {'content': 'these ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='these ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. ')]), raw={'choices': [{'delta': {'content': 'models. ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='models. ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to ')]), raw={'choices': [{'delta': {'content': 'It provides a way to ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='It provides a way to ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces ')]), raw={'choices': [{'delta': {'content': 'define interfaces ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='define interfaces ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors ')]), raw={'choices': [{'delta': {'content': 'and behaviors ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='and behaviors ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for ')]), raw={'choices': [{'delta': {'content': 'for ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='for ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text ')]), raw={'choices': [{'delta': {'content': 'text ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='text ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, ')]), raw={'choices': [{'delta': {'content': 'transformations, ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='transformations, ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and ')]), raw={'choices': [{'delta': {'content': 'and ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='and ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various ')]), raw={'choices': [{'delta': {'content': 'includes various ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='includes various ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various modules ')]), raw={'choices': [{'delta': {'content': 'modules ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='modules ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various modules that can be used to ')]), raw={'choices': [{'delta': {'content': 'that can be used to ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='that can be used to ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various modules that can be used to implement ')]), raw={'choices': [{'delta': {'content': 'implement ', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='implement ', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various modules that can be used to implement ')]), raw={'choices': [{'delta': {'content': '', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various modules that can be used to implement these transformations.')]), raw={'choices': [{'delta': {'content': 'these transformations.', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='these transformations.', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': 'stop'}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various modules that can be used to implement these transformations.')]), raw={'choices': [{'delta': {'content': ''}, 'finish_reason': 'stop', 'index': 0, 'logprobs': None}], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe'}, delta='', logprobs=None, additional_kwargs={}),\n",
       " ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None, 'usage': {'completion_tokens': 65, 'completion_tokens_after_first_per_sec': 198.08293442800777, 'completion_tokens_after_first_per_sec_first_ten': 200.5613091844793, 'completion_tokens_per_sec': 66.11378040544227, 'end_time': 1735926261.7462182, 'is_last_response': True, 'prompt_tokens': 2155, 'start_time': 1735926260.7630646, 'time_to_first_token': 0.6600565910339355, 'total_latency': 0.9831535816192627, 'total_tokens': 2220, 'total_tokens_per_sec': 2258.0398846166436}, 'model_name': 'Meta-Llama-3.3-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1735926260}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various modules that can be used to implement these transformations.')]), raw={'choices': [], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe', 'usage': {'completion_tokens': 65, 'completion_tokens_after_first_per_sec': 198.08293442800777, 'completion_tokens_after_first_per_sec_first_ten': 200.5613091844793, 'completion_tokens_per_sec': 66.11378040544227, 'end_time': 1735926261.7462182, 'is_last_response': True, 'prompt_tokens': 2155, 'start_time': 1735926260.7630646, 'time_to_first_token': 0.6600565910339355, 'total_latency': 0.9831535816192627, 'total_tokens': 2220, 'total_tokens_per_sec': 2258.0398846166436}}, delta='', logprobs=None, additional_kwargs={})]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_response = \"\"\n",
    "        \n",
    "# Simulate stream of response with milliseconds delay\n",
    "streaming_response = query_engine.llm.stream_chat(messages)\n",
    "        \n",
    "for chunk in streaming_response:\n",
    "    full_response += chunk\n",
    "    message_placeholder.markdown(full_response + \"▌\")\n",
    "\n",
    "ai_stream_msgs = []\n",
    "for stream in streaming_response:\n",
    "    ai_stream_msgs.append(stream)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'finish_reason': None, 'usage': {'completion_tokens': 65, 'completion_tokens_after_first_per_sec': 198.08293442800777, 'completion_tokens_after_first_per_sec_first_ten': 200.5613091844793, 'completion_tokens_per_sec': 66.11378040544227, 'end_time': 1735926261.7462182, 'is_last_response': True, 'prompt_tokens': 2155, 'start_time': 1735926260.7630646, 'time_to_first_token': 0.6600565910339355, 'total_latency': 0.9831535816192627, 'total_tokens': 2220, 'total_tokens_per_sec': 2258.0398846166436}, 'model_name': 'Meta-Llama-3.3-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1735926260}, blocks=[TextBlock(block_type='text', text='DSPy appears to be a programming framework or system that utilizes natural language signatures to interact with Language Models (LMs), allowing for more abstract and flexible prompting and fine-tuning of these models. It provides a way to define interfaces and behaviors for text transformations, and includes various modules that can be used to implement these transformations.')]), raw={'choices': [], 'created': 1735926260, 'id': '942f9445-47bf-48c0-b3ed-f33b7fd7bd00', 'model': 'Meta-Llama-3.3-70B-Instruct', 'object': 'chat.completion.chunk', 'system_fingerprint': 'fastcoe', 'usage': {'completion_tokens': 65, 'completion_tokens_after_first_per_sec': 198.08293442800777, 'completion_tokens_after_first_per_sec_first_ten': 200.5613091844793, 'completion_tokens_per_sec': 66.11378040544227, 'end_time': 1735926261.7462182, 'is_last_response': True, 'prompt_tokens': 2155, 'start_time': 1735926260.7630646, 'time_to_first_token': 0.6600565910339355, 'total_latency': 0.9831535816192627, 'total_tokens': 2220, 'total_tokens_per_sec': 2258.0398846166436}}, delta='', logprobs=None, additional_kwargs={})"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ai_stream_msgs[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Context information is below.\n",
      "---------------------\n",
      "Preprint\n",
      "3.1 N ATURAL LANGUAGE SIGNATURES CAN ABSTRACT PROMPTING & FINETUNING\n",
      "Instead of free-form string prompts, DSPy programs use natural language signatures to assign work\n",
      "to the LM. A DSPy signature isnatural-language typed declaration of a function: a short declarative\n",
      "spec that tells DSPy what a text transformation needs to do (e.g., “consume questions and return\n",
      "answers”), rather than how a specific LM should be prompted to implement that behavior. More\n",
      "formally, a DSPy signature is a tuple of input fields and output fields (and an optional instruction).\n",
      "A field consists offield name and optional metadata.4 In typical usage, the roles of fields are inferred\n",
      "by DSPy as a function of field names. For instance, the DSPy compiler will use in-context learning\n",
      "to interpret questiondifferently from answer and will iteratively refine its usage of these fields.\n",
      "Signatures offer two benefits over prompts: they can be compiled into self-improving and pipeline-\n",
      "adaptive prompts or finetunes. This is primarily done by bootstrapping (Sec 4) useful demonstrating\n",
      "examples for each signature. Additionally, they handle structured formatting and parsing logic to\n",
      "reduce (or, ideally, avoid) brittle string manipulation in user programs.\n",
      "In practice, DSPy signatures can be expressed with a shorthand notation likequestion -> answer,\n",
      "so that line 1 in the following is a complete DSPy program for a basic question-answering system\n",
      "(with line 2 illustrating usage and line 3 the response when GPT-3.5 is the LM):\n",
      "1 qa = dspy.Predict(\"question -> answer\")\n",
      "2 qa(question=\"Where is Guaran ´ı spoken?\")\n",
      "3 # Out: Prediction(answer=’Guaran ´ı is spoken mainly in South America.’)\n",
      "In the shorthand notation, each field’s name indicates the semantic role that the input (or output)\n",
      "field plays in the transformation. DSPy will parse this notation and expand the field names into\n",
      "meaningful instructions for the LM, so that english document -> french translation would\n",
      "prompt for English to French translation. When needed, DSPy offers more advanced programming\n",
      "interfaces for expressing more explicit constraints on signatures (Appendix A).\n",
      "3.2 P ARAMETERIZED & TEMPLATED MODULES CAN ABSTRACT PROMPTING TECHNIQUES\n",
      "Akin to type signatures in programming languages, DSPy signatures simply define an interface and\n",
      "provide type-like hints on the expected behavior. To use a signature, we must declare amodule with\n",
      "that signature, like we instantiated a Predict module above. A module declaration like this returns\n",
      "a function having that signature.\n",
      "The Predict Module The core module for working with signatures in DSPy isPredict(simplified\n",
      "pseudocode in Appendix D.1). Internally, Predict stores the supplied signature, an optional LM to\n",
      "use (initially None, but otherwise overrides the default LM for this module), and a list of demon-\n",
      "strations for prompting (initially empty). Like layers in PyTorch, the instantiated module behaves as\n",
      "a callable function: it takes in keyword arguments corresponding to the signature input fields (e.g.,\n",
      "question), formats a prompt to implement the signature and includes the appropriate demonstra-\n",
      "tions, calls the LM, and parses the output fields. When Predict detects it’s being used in compile\n",
      "mode, it will also internally track input/output traces to assist the teleprompter at bootstrapping the\n",
      "demonstrations.\n",
      "Other Built-in ModulesDSPy modules translate prompting techniques into modular functions that\n",
      "support any signature, contrasting with the standard approach of prompting LMs with task-specific\n",
      "details (e.g., hand-written few-shot examples). To this end, DSPy includes a number of more sophis-\n",
      "ticated modules like ChainOfThought, ProgramOfThought, MultiChainComparison, and ReAct.5\n",
      "These can all be used interchangeably to implement a DSPy signature. For instance, simply chang-\n",
      "4String descriptions of the task and the fields are also optional and usually omitted. Fields can carry optional\n",
      "field prefix and description. By default, fields are assumed to hold free-form strings; we are actively exploring\n",
      "optional data type as a way to specify constraints on valid values (e.g.,boolor int) and more gracefully handle\n",
      "formatting and parsing logic, though this feature is not core to DSPy at the time of writing.\n",
      "5These modules generalize prompting techniques from the literature, respectively, by Wei et al. (2022),\n",
      "Chen et al. (2022), Yoran et al. (2023), and Yao et al. (2022) and, in doing so, generalize the ideas on zero-shot\n",
      "prompting and rationale self-generation from Kojima et al. (2022), Zelikman et al. (2022), Zhang et al. (2022),\n",
      "and Huang et al. (2022) to parameterized modules that can bootstrap arbitrary multi-stage pipelines.\n",
      "4\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "3.1 N ATURAL LANGUAGE SIGNATURES CAN ABSTRACT PROMPTING & FINETUNING\n",
      "Instead of free-form string prompts, DSPy programs use natural language signatures to assign work\n",
      "to the LM. A DSPy signature isnatural-language typed declaration of a function: a short declarative\n",
      "spec that tells DSPy what a text transformation needs to do (e.g., “consume questions and return\n",
      "answers”), rather than how a specific LM should be prompted to implement that behavior. More\n",
      "formally, a DSPy signature is a tuple of input fields and output fields (and an optional instruction).\n",
      "A field consists offield name and optional metadata.4 In typical usage, the roles of fields are inferred\n",
      "by DSPy as a function of field names. For instance, the DSPy compiler will use in-context learning\n",
      "to interpret questiondifferently from answer and will iteratively refine its usage of these fields.\n",
      "Signatures offer two benefits over prompts: they can be compiled into self-improving and pipeline-\n",
      "adaptive prompts or finetunes. This is primarily done by bootstrapping (Sec 4) useful demonstrating\n",
      "examples for each signature. Additionally, they handle structured formatting and parsing logic to\n",
      "reduce (or, ideally, avoid) brittle string manipulation in user programs.\n",
      "In practice, DSPy signatures can be expressed with a shorthand notation likequestion -> answer,\n",
      "so that line 1 in the following is a complete DSPy program for a basic question-answering system\n",
      "(with line 2 illustrating usage and line 3 the response when GPT-3.5 is the LM):\n",
      "1 qa = dspy.Predict(\"question -> answer\")\n",
      "2 qa(question=\"Where is Guaran ´ı spoken?\")\n",
      "3 # Out: Prediction(answer=’Guaran ´ı is spoken mainly in South America.’)\n",
      "In the shorthand notation, each field’s name indicates the semantic role that the input (or output)\n",
      "field plays in the transformation. DSPy will parse this notation and expand the field names into\n",
      "meaningful instructions for the LM, so that english document -> french translation would\n",
      "prompt for English to French translation. When needed, DSPy offers more advanced programming\n",
      "interfaces for expressing more explicit constraints on signatures (Appendix A).\n",
      "3.2 P ARAMETERIZED & TEMPLATED MODULES CAN ABSTRACT PROMPTING TECHNIQUES\n",
      "Akin to type signatures in programming languages, DSPy signatures simply define an interface and\n",
      "provide type-like hints on the expected behavior. To use a signature, we must declare amodule with\n",
      "that signature, like we instantiated a Predict module above. A module declaration like this returns\n",
      "a function having that signature.\n",
      "The Predict Module The core module for working with signatures in DSPy isPredict(simplified\n",
      "pseudocode in Appendix D.1). Internally, Predict stores the supplied signature, an optional LM to\n",
      "use (initially None, but otherwise overrides the default LM for this module), and a list of demon-\n",
      "strations for prompting (initially empty). Like layers in PyTorch, the instantiated module behaves as\n",
      "a callable function: it takes in keyword arguments corresponding to the signature input fields (e.g.,\n",
      "question), formats a prompt to implement the signature and includes the appropriate demonstra-\n",
      "tions, calls the LM, and parses the output fields. When Predict detects it’s being used in compile\n",
      "mode, it will also internally track input/output traces to assist the teleprompter at bootstrapping the\n",
      "demonstrations.\n",
      "Other Built-in ModulesDSPy modules translate prompting techniques into modular functions that\n",
      "support any signature, contrasting with the standard approach of prompting LMs with task-specific\n",
      "details (e.g., hand-written few-shot examples). To this end, DSPy includes a number of more sophis-\n",
      "ticated modules like ChainOfThought, ProgramOfThought, MultiChainComparison, and ReAct.5\n",
      "These can all be used interchangeably to implement a DSPy signature. For instance, simply chang-\n",
      "4String descriptions of the task and the fields are also optional and usually omitted. Fields can carry optional\n",
      "field prefix and description. By default, fields are assumed to hold free-form strings; we are actively exploring\n",
      "optional data type as a way to specify constraints on valid values (e.g.,boolor int) and more gracefully handle\n",
      "formatting and parsing logic, though this feature is not core to DSPy at the time of writing.\n",
      "5These modules generalize prompting techniques from the literature, respectively, by Wei et al. (2022),\n",
      "Chen et al. (2022), Yoran et al. (2023), and Yao et al. (2022) and, in doing so, generalize the ideas on zero-shot\n",
      "prompting and rationale self-generation from Kojima et al. (2022), Zelikman et al. (2022), Zhang et al. (2022),\n",
      "and Huang et al. (2022) to parameterized modules that can bootstrap arbitrary multi-stage pipelines.\n",
      "4\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "calls in existing LM pipelines and in popular developer frameworks are generally implemented using\n",
      "hard-coded ‘prompt templates’, that is, long strings of instructions and demonstrations that are hand\n",
      "crafted through manual trial and error. We argue that this approach, while pervasive, can be brittle\n",
      "and unscalable—conceptually akin to hand-tuning the weights for a classifier. A given string prompt\n",
      "might not generalize to different pipelines or across different LMs, data domains, or even inputs.\n",
      "Toward a more systematic approach to designing AI pipelines, we introduce theDSPy programming\n",
      "model.1 DSPy pushes building new LM pipelines away from manipulating free-form strings and\n",
      "closer to programming (composing modular operators to build text transformation graphs) where a\n",
      "compiler automatically generates optimized LM invocation strategies and prompts from a program.\n",
      "We draw inspiration from the consensus that emerged around neural network abstractions (Bergstra\n",
      "et al., 2013), where (1) many general-purpose layers can be modularly composed in any complex\n",
      "architecture and (2) the model weights can be trained using optimizers instead of being hand-tuned.\n",
      "To this end, we propose the DSPy programming model(Sec 3). We first translate string-based\n",
      "prompting techniques, including complex and task-dependent ones like Chain of Thought (Wei et al.,\n",
      "2022) and ReAct (Yao et al., 2022), into declarative modules that carrynatural-language typed sig-\n",
      "natures. DSPy modules are task-adaptive components—akin to neural network layers—that abstract\n",
      "any particular text transformation, like answering a question or summarizing a paper. We then pa-\n",
      "rameterize each module so that it can learn its desired behavior by iteratively bootstrapping useful\n",
      "demonstrations within the pipeline. Inspired directly by PyTorch abstractions (Paszke et al., 2019),\n",
      "DSPy modules are used via expressive define-by-run computational graphs. Pipelines are expressed\n",
      "by (1) declaring the modules needed and (2) using these modules in any logical control flow (e.g.,\n",
      "ifstatements, for loops, exceptions, etc.) to logically connect the modules.\n",
      "We then develop theDSPy compiler(Sec 4), which optimizes any DSPy program to improve quality\n",
      "or cost. The compiler inputs are the program, a few training inputs with optional labels, and a valida-\n",
      "tion metric. The compiler simulates versions of the program on the inputs and bootstraps example\n",
      "traces of each module for self-improvement, using them to construct effective few-shot prompts\n",
      "or finetuning small LMs for steps of the pipeline. Optimization in DSPy is highly modular: it is\n",
      "conducted by teleprompters,2 which are general-purpose optimization strategies that determine how\n",
      "the modules should learn from data. In this way, the compiler automatically maps the declarative\n",
      "modules to high-quality compositions of prompting, finetuning, reasoning, and augmentation.\n",
      "Programming models like DSPy could be assessed along many dimensions, but we focus on the role\n",
      "of expert-crafted prompts in shaping system performance. We are seeking to reduce or even remove\n",
      "their role through DSPy modules (e.g., versions of popular techniques like Chain of Thought) and\n",
      "teleprompters. We report on two expansive case studies: math word problems (GMS8K; Cobbe et al.\n",
      "2021) and multi-hop question answering (HotPotQA; Yang et al. 2018) with explorations of chain\n",
      "of thought, multi-chain reflection, multi-hop retrieval, retrieval-augmented question answering, and\n",
      "agent loops. Our evaluations use a number of different compiling strategies effectively and show\n",
      "that straightforward DSPy programs outperform systems using hand-crafted prompts, while also\n",
      "allowing our programs to use much smaller and hence more efficient LMs effectively.\n",
      "Overall, this work proposes the first programming model that translates prompting techniques into\n",
      "parameterized declarative modules and introduces an effective compiler with general optimiza-\n",
      "tion strategies (teleprompters) to optimize arbitrary pipelines of these modules. Our main contri-\n",
      "butions are empirical and algorithmic: with DSPy, we have found that we can implement very\n",
      "short programs that can bootstrap self-improving multi-stage NLP systems using LMs as small as\n",
      "llama2-13b-chat and T5-Large (770M parameters). Without hand-crafted prompts and within\n",
      "minutes to tens of minutes of compiling, compositions of DSPy modules can raise the quality of\n",
      "simple programs from 33% to 82% (Sec 6) and from 32% to 46% (Sec 7) for GPT-3.5 and, simi-\n",
      "larly, from 9% to 47% (Sec 6) and from 22% to 41% (Sec 7) for llama2-13b-chat.\n",
      "1DSPy is pronounced dee-ess-pie. It’s the second iteration of our earlier Demonstrate–Search–Predict\n",
      "framework (DSP; Khattab et al. 2022). This paper introduces the key concepts in DSPy. For more extensive and\n",
      "up-to-date documentation of the framework, we refer readers to https://github.com/stanfordnlp/dspy.\n",
      "2We derive the name tele-prompters from the notion of abstracting and automating the task of prompting,\n",
      "in particular, such that it happens at a distance, without manual intervention.\n",
      "2\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "calls in existing LM pipelines and in popular developer frameworks are generally implemented using\n",
      "hard-coded ‘prompt templates’, that is, long strings of instructions and demonstrations that are hand\n",
      "crafted through manual trial and error. We argue that this approach, while pervasive, can be brittle\n",
      "and unscalable—conceptually akin to hand-tuning the weights for a classifier. A given string prompt\n",
      "might not generalize to different pipelines or across different LMs, data domains, or even inputs.\n",
      "Toward a more systematic approach to designing AI pipelines, we introduce theDSPy programming\n",
      "model.1 DSPy pushes building new LM pipelines away from manipulating free-form strings and\n",
      "closer to programming (composing modular operators to build text transformation graphs) where a\n",
      "compiler automatically generates optimized LM invocation strategies and prompts from a program.\n",
      "We draw inspiration from the consensus that emerged around neural network abstractions (Bergstra\n",
      "et al., 2013), where (1) many general-purpose layers can be modularly composed in any complex\n",
      "architecture and (2) the model weights can be trained using optimizers instead of being hand-tuned.\n",
      "To this end, we propose the DSPy programming model(Sec 3). We first translate string-based\n",
      "prompting techniques, including complex and task-dependent ones like Chain of Thought (Wei et al.,\n",
      "2022) and ReAct (Yao et al., 2022), into declarative modules that carrynatural-language typed sig-\n",
      "natures. DSPy modules are task-adaptive components—akin to neural network layers—that abstract\n",
      "any particular text transformation, like answering a question or summarizing a paper. We then pa-\n",
      "rameterize each module so that it can learn its desired behavior by iteratively bootstrapping useful\n",
      "demonstrations within the pipeline. Inspired directly by PyTorch abstractions (Paszke et al., 2019),\n",
      "DSPy modules are used via expressive define-by-run computational graphs. Pipelines are expressed\n",
      "by (1) declaring the modules needed and (2) using these modules in any logical control flow (e.g.,\n",
      "ifstatements, for loops, exceptions, etc.) to logically connect the modules.\n",
      "We then develop theDSPy compiler(Sec 4), which optimizes any DSPy program to improve quality\n",
      "or cost. The compiler inputs are the program, a few training inputs with optional labels, and a valida-\n",
      "tion metric. The compiler simulates versions of the program on the inputs and bootstraps example\n",
      "traces of each module for self-improvement, using them to construct effective few-shot prompts\n",
      "or finetuning small LMs for steps of the pipeline. Optimization in DSPy is highly modular: it is\n",
      "conducted by teleprompters,2 which are general-purpose optimization strategies that determine how\n",
      "the modules should learn from data. In this way, the compiler automatically maps the declarative\n",
      "modules to high-quality compositions of prompting, finetuning, reasoning, and augmentation.\n",
      "Programming models like DSPy could be assessed along many dimensions, but we focus on the role\n",
      "of expert-crafted prompts in shaping system performance. We are seeking to reduce or even remove\n",
      "their role through DSPy modules (e.g., versions of popular techniques like Chain of Thought) and\n",
      "teleprompters. We report on two expansive case studies: math word problems (GMS8K; Cobbe et al.\n",
      "2021) and multi-hop question answering (HotPotQA; Yang et al. 2018) with explorations of chain\n",
      "of thought, multi-chain reflection, multi-hop retrieval, retrieval-augmented question answering, and\n",
      "agent loops. Our evaluations use a number of different compiling strategies effectively and show\n",
      "that straightforward DSPy programs outperform systems using hand-crafted prompts, while also\n",
      "allowing our programs to use much smaller and hence more efficient LMs effectively.\n",
      "Overall, this work proposes the first programming model that translates prompting techniques into\n",
      "parameterized declarative modules and introduces an effective compiler with general optimiza-\n",
      "tion strategies (teleprompters) to optimize arbitrary pipelines of these modules. Our main contri-\n",
      "butions are empirical and algorithmic: with DSPy, we have found that we can implement very\n",
      "short programs that can bootstrap self-improving multi-stage NLP systems using LMs as small as\n",
      "llama2-13b-chat and T5-Large (770M parameters). Without hand-crafted prompts and within\n",
      "minutes to tens of minutes of compiling, compositions of DSPy modules can raise the quality of\n",
      "simple programs from 33% to 82% (Sec 6) and from 32% to 46% (Sec 7) for GPT-3.5 and, simi-\n",
      "larly, from 9% to 47% (Sec 6) and from 22% to 41% (Sec 7) for llama2-13b-chat.\n",
      "1DSPy is pronounced dee-ess-pie. It’s the second iteration of our earlier Demonstrate–Search–Predict\n",
      "framework (DSP; Khattab et al. 2022). This paper introduces the key concepts in DSPy. For more extensive and\n",
      "up-to-date documentation of the framework, we refer readers to https://github.com/stanfordnlp/dspy.\n",
      "2We derive the name tele-prompters from the notion of abstracting and automating the task of prompting,\n",
      "in particular, such that it happens at a distance, without manual intervention.\n",
      "2\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "ing Predict to ChainOfThought in the above program leads to a system that thinks step by step\n",
      "before committing to its output field.\n",
      "Importantly, all of these modules are implemented in a few lines of code by expanding the user-\n",
      "defined signature and calling Predict one or more times on new signatures as appropriate. For\n",
      "instance, we show a simplified implementation of the built-in ChainOfThought below.\n",
      "1 class ChainOfThought(dspy.Module):\n",
      "2 def __init__(self, signature):\n",
      "3 # Modify signature from ‘*inputs -> *outputs‘ to ‘*inputs -> rationale, *outputs‘.\n",
      "4 rationale_field = dspy.OutputField(prefix=\"Reasoning: Let’s think step by step.\")\n",
      "5 signature = dspy.Signature(signature).prepend_output_field(rationale_field)\n",
      "6\n",
      "7 # Declare a sub-module with the modified signature.\n",
      "8 self.predict = dspy.Predict(signature)\n",
      "9\n",
      "10 def forward(self, **kwargs):\n",
      "11 # Just forward the inputs to the sub-module.\n",
      "12 return self.predict(**kwargs)\n",
      "This is a fully-fledged module capable of learning effective few-shot prompting for any LM or task.\n",
      "We contrast that with Appendix C, which copies long reasoning prompts hand-written by sources\n",
      "ranging from recent research to popular prompting libraries.\n",
      "Parameterization Uniquely, DSPy parameterizes these prompting techniques. To understand this\n",
      "parameterization, observe that any LM call seeking to implement a particular signature needs to\n",
      "specify parameters that include: (1) the specific LM to call (Chen et al., 2023), (2) the prompt in-\n",
      "structions (Yang et al., 2023) and the string prefix of each signature field and, most importantly, (3)\n",
      "the demonstrations used as few-shot prompts (for frozen LMs) or as training data (for finetuning).\n",
      "We focus primarily on automatically generating and selecting useful demonstrations. In our case\n",
      "studies, we find that bootstrapping good demonstrations gives us a powerful way to teach sophisti-\n",
      "cated pipelines of LMs new behaviors systematically.\n",
      "Tools DSPy programs may use tools, which are modules that execute computation. We support re-\n",
      "trieval models through a dspy.Retrievemodule. At the time of writing, DSPy has built-in support\n",
      "for ColBERTv2, Pyserini, and Pinecone retrievers, and we have explored experimental dspy.SQL\n",
      "for executing SQL queries and dspy.PythonInterpreterfor executing Python code in a sandbox.\n",
      "Programs DSPy modules can be composed in arbitrary pipelines in a define-by-run interface. In-\n",
      "spired directly by PyTorch and Chainer, one first declares the modules needed at initialization, allow-\n",
      "ing DSPy to keep track of them for optimization, and then one expresses the pipeline with arbitrary\n",
      "code that calls the modules in a forward method. As a simple illustration, we offer the following\n",
      "simple but complete retrieval-augmented generation (RAG) system.\n",
      "1 class RAG(dspy.Module):\n",
      "2 def __init__(self, num_passages=3):\n",
      "3 # ‘Retrieve‘ will use the user’s default retrieval settings unless overriden.\n",
      "4 self.retrieve = dspy.Retrieve(k=num_passages)\n",
      "5 # ‘ChainOfThought‘ with signature that generates answers given retrieval & question.\n",
      "6 self.generate_answer = dspy.ChainOfThought(\"context, question -> answer\")\n",
      "7\n",
      "8 def forward(self, question):\n",
      "9 context = self.retrieve(question).passages\n",
      "10 return self.generate_answer(context=context, question=question)\n",
      "To highlight modularity, we use ChainOfThought as a drop-in replacement of the basic Predict.\n",
      "One can now simply write RAG()(\"Where is Guaran ´ı spoken?\") to use it. Notice that, if we\n",
      "use a signature \"context, question -> search query\", we get a system that generates search\n",
      "queries rather than answers.\n",
      "3.3 T ELEPROMPTERS CAN AUTOMATE PROMPTING FOR ARBITRARY PIPELINES\n",
      "When compiling a DSPy program, we generally invoke a teleprompter, which is an optimizer that\n",
      "takes the program, a training set, and a metric—and returns a new optimized program. Different\n",
      "teleprompters (Sec 4) apply different strategies for optimization.\n",
      "5\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "ing Predict to ChainOfThought in the above program leads to a system that thinks step by step\n",
      "before committing to its output field.\n",
      "Importantly, all of these modules are implemented in a few lines of code by expanding the user-\n",
      "defined signature and calling Predict one or more times on new signatures as appropriate. For\n",
      "instance, we show a simplified implementation of the built-in ChainOfThought below.\n",
      "1 class ChainOfThought(dspy.Module):\n",
      "2 def __init__(self, signature):\n",
      "3 # Modify signature from ‘*inputs -> *outputs‘ to ‘*inputs -> rationale, *outputs‘.\n",
      "4 rationale_field = dspy.OutputField(prefix=\"Reasoning: Let’s think step by step.\")\n",
      "5 signature = dspy.Signature(signature).prepend_output_field(rationale_field)\n",
      "6\n",
      "7 # Declare a sub-module with the modified signature.\n",
      "8 self.predict = dspy.Predict(signature)\n",
      "9\n",
      "10 def forward(self, **kwargs):\n",
      "11 # Just forward the inputs to the sub-module.\n",
      "12 return self.predict(**kwargs)\n",
      "This is a fully-fledged module capable of learning effective few-shot prompting for any LM or task.\n",
      "We contrast that with Appendix C, which copies long reasoning prompts hand-written by sources\n",
      "ranging from recent research to popular prompting libraries.\n",
      "Parameterization Uniquely, DSPy parameterizes these prompting techniques. To understand this\n",
      "parameterization, observe that any LM call seeking to implement a particular signature needs to\n",
      "specify parameters that include: (1) the specific LM to call (Chen et al., 2023), (2) the prompt in-\n",
      "structions (Yang et al., 2023) and the string prefix of each signature field and, most importantly, (3)\n",
      "the demonstrations used as few-shot prompts (for frozen LMs) or as training data (for finetuning).\n",
      "We focus primarily on automatically generating and selecting useful demonstrations. In our case\n",
      "studies, we find that bootstrapping good demonstrations gives us a powerful way to teach sophisti-\n",
      "cated pipelines of LMs new behaviors systematically.\n",
      "Tools DSPy programs may use tools, which are modules that execute computation. We support re-\n",
      "trieval models through a dspy.Retrievemodule. At the time of writing, DSPy has built-in support\n",
      "for ColBERTv2, Pyserini, and Pinecone retrievers, and we have explored experimental dspy.SQL\n",
      "for executing SQL queries and dspy.PythonInterpreterfor executing Python code in a sandbox.\n",
      "Programs DSPy modules can be composed in arbitrary pipelines in a define-by-run interface. In-\n",
      "spired directly by PyTorch and Chainer, one first declares the modules needed at initialization, allow-\n",
      "ing DSPy to keep track of them for optimization, and then one expresses the pipeline with arbitrary\n",
      "code that calls the modules in a forward method. As a simple illustration, we offer the following\n",
      "simple but complete retrieval-augmented generation (RAG) system.\n",
      "1 class RAG(dspy.Module):\n",
      "2 def __init__(self, num_passages=3):\n",
      "3 # ‘Retrieve‘ will use the user’s default retrieval settings unless overriden.\n",
      "4 self.retrieve = dspy.Retrieve(k=num_passages)\n",
      "5 # ‘ChainOfThought‘ with signature that generates answers given retrieval & question.\n",
      "6 self.generate_answer = dspy.ChainOfThought(\"context, question -> answer\")\n",
      "7\n",
      "8 def forward(self, question):\n",
      "9 context = self.retrieve(question).passages\n",
      "10 return self.generate_answer(context=context, question=question)\n",
      "To highlight modularity, we use ChainOfThought as a drop-in replacement of the basic Predict.\n",
      "One can now simply write RAG()(\"Where is Guaran ´ı spoken?\") to use it. Notice that, if we\n",
      "use a signature \"context, question -> search query\", we get a system that generates search\n",
      "queries rather than answers.\n",
      "3.3 T ELEPROMPTERS CAN AUTOMATE PROMPTING FOR ARBITRARY PIPELINES\n",
      "When compiling a DSPy program, we generally invoke a teleprompter, which is an optimizer that\n",
      "takes the program, a training set, and a metric—and returns a new optimized program. Different\n",
      "teleprompters (Sec 4) apply different strategies for optimization.\n",
      "5\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "D M ODULES\n",
      "D.1 P REDICT\n",
      "1 class Predict(dspy.Module):\n",
      "2 def __init__(self, signature, **config):\n",
      "3 self.signature = dspy.Signature(signature)\n",
      "4 self.config = config\n",
      "5\n",
      "6 # Module Parameters.\n",
      "7 self.lm = dspy.ParameterLM(None) # use the default LM\n",
      "8 self.demonstrations = dspy.ParameterDemonstrations([])\n",
      "9\n",
      "10 def forward(self, **kwargs):\n",
      "11 lm = get_the_right_lm(self.lm, kwargs)\n",
      "12 signature = get_the_right_signature(self.signature, kwargs)\n",
      "13 demonstrations = get_the_right_demonstrations(self.demonstrations, kwargs)\n",
      "14\n",
      "15 prompt = signature(demos=self.demos, **kwargs)\n",
      "16 completions = lm.generate(prompt, **self.config)\n",
      "17 prediction = Prediction.from_completions(completions, signature=signature)\n",
      "18\n",
      "19 if dsp.settings.compiling is not None:\n",
      "20 trace = dict(predictor=self, inputs=kwargs, outputs=prediction)\n",
      "21 dspy.settings.traces.append(trace)\n",
      "22\n",
      "23 return prediction\n",
      "D.2 C HAIN OF THOUGHT\n",
      "1 class ChainOfThought(dspy.Module):\n",
      "2 def __init__(self, signature):\n",
      "3\n",
      "4 # Modify signature from ‘*inputs -> *outputs‘ to ‘*inputs -> rationale, *outputs‘.\n",
      "5 rationale_field = dspy.OutputField(prefix=\"Reasoning: Let’s think step by step.\")\n",
      "6 signature = dspy.Signature(signature).prepend_output_field(rationale_field)\n",
      "7\n",
      "8 # Declare a sub-module with the modified signature.\n",
      "9 self.predict = dspy.Predict(self.signature)\n",
      "10\n",
      "11 def forward(self, **kwargs):\n",
      "12 # Just forward the inputs to the sub-module.\n",
      "13 return self.predict(**kwargs)\n",
      "27\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "D M ODULES\n",
      "D.1 P REDICT\n",
      "1 class Predict(dspy.Module):\n",
      "2 def __init__(self, signature, **config):\n",
      "3 self.signature = dspy.Signature(signature)\n",
      "4 self.config = config\n",
      "5\n",
      "6 # Module Parameters.\n",
      "7 self.lm = dspy.ParameterLM(None) # use the default LM\n",
      "8 self.demonstrations = dspy.ParameterDemonstrations([])\n",
      "9\n",
      "10 def forward(self, **kwargs):\n",
      "11 lm = get_the_right_lm(self.lm, kwargs)\n",
      "12 signature = get_the_right_signature(self.signature, kwargs)\n",
      "13 demonstrations = get_the_right_demonstrations(self.demonstrations, kwargs)\n",
      "14\n",
      "15 prompt = signature(demos=self.demos, **kwargs)\n",
      "16 completions = lm.generate(prompt, **self.config)\n",
      "17 prediction = Prediction.from_completions(completions, signature=signature)\n",
      "18\n",
      "19 if dsp.settings.compiling is not None:\n",
      "20 trace = dict(predictor=self, inputs=kwargs, outputs=prediction)\n",
      "21 dspy.settings.traces.append(trace)\n",
      "22\n",
      "23 return prediction\n",
      "D.2 C HAIN OF THOUGHT\n",
      "1 class ChainOfThought(dspy.Module):\n",
      "2 def __init__(self, signature):\n",
      "3\n",
      "4 # Modify signature from ‘*inputs -> *outputs‘ to ‘*inputs -> rationale, *outputs‘.\n",
      "5 rationale_field = dspy.OutputField(prefix=\"Reasoning: Let’s think step by step.\")\n",
      "6 signature = dspy.Signature(signature).prepend_output_field(rationale_field)\n",
      "7\n",
      "8 # Declare a sub-module with the modified signature.\n",
      "9 self.predict = dspy.Predict(self.signature)\n",
      "10\n",
      "11 def forward(self, **kwargs):\n",
      "12 # Just forward the inputs to the sub-module.\n",
      "13 return self.predict(**kwargs)\n",
      "27\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "7 C ASE STUDY: C OMPLEX QUESTION ANSWERING\n",
      "In this case study, we explore the multi-hop question answering task with the HotPotQA (Yang et al.,\n",
      "2018) dataset in the open-domain “fullwiki” setting. For retrieval, we use a search index of the of-\n",
      "ficial Wikipedia 2017 “abstracts” dump of HotPotQA. Search is conducted by a ColBERTv2 (San-\n",
      "thanam et al., 2021) retriever. The HotPotQA test set is hidden, so we reserve the official validation\n",
      "set for our testing, and sample 1000 examples for that. We sub-divide the training set into 70%/30%\n",
      "train/validation splits. In the training (and thus validation) split, we keep only examples marked as\n",
      "“hard” in the original dataset, which matches the designation of the official validation and test sets.\n",
      "For training and for reporting development results, we sample 200 and 300 examples respectively.\n",
      "Programs Considered Our simplest baseline is the vanilla program used in the previous case\n",
      "study on GSM8K (Sec 6); the \"question -> answer\" signature is universal enough that it will\n",
      "work for this task (and many others) when compiled appropriately.\n",
      "Our baseline RAG program is the one given in Section 3.2 as a simple example of RAG with a\n",
      "dspy.ChainOfThought layer. We will see that this program does not excel at HotPotQA, and this\n",
      "motivates us to evaluate two multi-hop programs.\n",
      "To that end, we first test ReAct (Yao et al., 2022), a multi-step agent for tool use, which is imple-\n",
      "mented as a built-in module in DSPy. In the simplest case, a ReAct module for a particular signature\n",
      "can be declared as follows in DSPy:\n",
      "1 react = dspy.ReAct(\"question -> answer\", tools=[dspy.Retrieve(k=1)], max_iters=5)\n",
      "We also test the following custom program, which simulates the information flow in Baleen (Khattab\n",
      "et al., 2021a) and IRRR (Qi et al., 2020) and has similarities to IRCoT (Trivedi et al., 2022).\n",
      "1 class BasicMultiHop(dspy.Module):\n",
      "2 def __init__(self, passages_per_hop):\n",
      "3 self.retrieve = dspy.Retrieve(k=passages_per_hop)\n",
      "4 self.generate_query = dspy.ChainOfThought(\"context, question -> search_query\")\n",
      "5 self.generate_answer = dspy.ChainOfThought(\"context, question -> answer\")\n",
      "6\n",
      "7 def forward(self, question):\n",
      "8 context = []\n",
      "9\n",
      "10 for hop in range(2):\n",
      "11 query = self.generate_query(context=context, question=question).search_query\n",
      "12 context += self.retrieve(query).passages\n",
      "13\n",
      "14 return self.generate_answer(context=context, question=question)\n",
      "15\n",
      "16 multihop = BasicMultiHop(passages_per_hop=3)\n",
      "Compiling For compilers, we continue to use the ones that we used for GSM8K (see Sec 6). We\n",
      "also consider two compositions of our teleprompters. For ReAct, we consider bootstrapping with\n",
      "BootstrapFewShotWithRandomSearch starting from an earlier bootstrap of the ReAct program.\n",
      "For the simple multihop program, we also consider fine-tuning with T5-Large starting from the\n",
      "earlier bootstrap of that program.\n",
      "1 multihop_t5 = dspy.BootstrapFinetune(metric=answer_exact_match).compile(program,\n",
      "teacher=bootstrap, trainset=trainset, target=’t5-large’)\n",
      "Results Table 2 summarizes our results. Compared with the vanillafew-shot prompting, a chain-\n",
      "of-thought and retrieval-augmented generation ( CoT RAG) program can self-bootstrap in DSPy to\n",
      "increase answer EM substantially. However, this relies entirely on the ColBERTv2 retriever to find\n",
      "relevant passages directly from the original questions, limiting its passage recall. This is tackled in\n",
      "the reactand multihopprograms, which will generate queries for the retriever in multiple iterative\n",
      "“hops”. Indeed, overall, a simple multihop program performs the best, and in general bootstrap\n",
      "again proves to be very effective at raising its quality relative to itsfewshot variant for both LMs.\n",
      "In particular, we can see that bootstrap (and/or bootstrap×2) can outperform both fewshot\n",
      "prompting (for multihop) and expert human reasoning (for react; adapted slightly from Yao et al.\n",
      "(2022) to our retrieval setting). Perhaps most importantly, we can makellama2-13b-chatcompet-\n",
      "itive with GPT-3.5 by simply compiling our programs.\n",
      "To assess the finetuning capacity of DSPy, we also evaluated the compiler multihop t5 defined\n",
      "above which produces a T5-Large (770M parameter) model. This program scores 39.3% answer\n",
      "EM and 46.0% passage accuracy on the dev set, using only 200 labeled inputs and 800 unlabeled\n",
      "10\n",
      "\n",
      "---\n",
      "\n",
      "Preprint\n",
      "7 C ASE STUDY: C OMPLEX QUESTION ANSWERING\n",
      "In this case study, we explore the multi-hop question answering task with the HotPotQA (Yang et al.,\n",
      "2018) dataset in the open-domain “fullwiki” setting. For retrieval, we use a search index of the of-\n",
      "ficial Wikipedia 2017 “abstracts” dump of HotPotQA. Search is conducted by a ColBERTv2 (San-\n",
      "thanam et al., 2021) retriever. The HotPotQA test set is hidden, so we reserve the official validation\n",
      "set for our testing, and sample 1000 examples for that. We sub-divide the training set into 70%/30%\n",
      "train/validation splits. In the training (and thus validation) split, we keep only examples marked as\n",
      "“hard” in the original dataset, which matches the designation of the official validation and test sets.\n",
      "For training and for reporting development results, we sample 200 and 300 examples respectively.\n",
      "Programs Considered Our simplest baseline is the vanilla program used in the previous case\n",
      "study on GSM8K (Sec 6); the \"question -> answer\" signature is universal enough that it will\n",
      "work for this task (and many others) when compiled appropriately.\n",
      "Our baseline RAG program is the one given in Section 3.2 as a simple example of RAG with a\n",
      "dspy.ChainOfThought layer. We will see that this program does not excel at HotPotQA, and this\n",
      "motivates us to evaluate two multi-hop programs.\n",
      "To that end, we first test ReAct (Yao et al., 2022), a multi-step agent for tool use, which is imple-\n",
      "mented as a built-in module in DSPy. In the simplest case, a ReAct module for a particular signature\n",
      "can be declared as follows in DSPy:\n",
      "1 react = dspy.ReAct(\"question -> answer\", tools=[dspy.Retrieve(k=1)], max_iters=5)\n",
      "We also test the following custom program, which simulates the information flow in Baleen (Khattab\n",
      "et al., 2021a) and IRRR (Qi et al., 2020) and has similarities to IRCoT (Trivedi et al., 2022).\n",
      "1 class BasicMultiHop(dspy.Module):\n",
      "2 def __init__(self, passages_per_hop):\n",
      "3 self.retrieve = dspy.Retrieve(k=passages_per_hop)\n",
      "4 self.generate_query = dspy.ChainOfThought(\"context, question -> search_query\")\n",
      "5 self.generate_answer = dspy.ChainOfThought(\"context, question -> answer\")\n",
      "6\n",
      "7 def forward(self, question):\n",
      "8 context = []\n",
      "9\n",
      "10 for hop in range(2):\n",
      "11 query = self.generate_query(context=context, question=question).search_query\n",
      "12 context += self.retrieve(query).passages\n",
      "13\n",
      "14 return self.generate_answer(context=context, question=question)\n",
      "15\n",
      "16 multihop = BasicMultiHop(passages_per_hop=3)\n",
      "Compiling For compilers, we continue to use the ones that we used for GSM8K (see Sec 6). We\n",
      "also consider two compositions of our teleprompters. For ReAct, we consider bootstrapping with\n",
      "BootstrapFewShotWithRandomSearch starting from an earlier bootstrap of the ReAct program.\n",
      "For the simple multihop program, we also consider fine-tuning with T5-Large starting from the\n",
      "earlier bootstrap of that program.\n",
      "1 multihop_t5 = dspy.BootstrapFinetune(metric=answer_exact_match).compile(program,\n",
      "teacher=bootstrap, trainset=trainset, target=’t5-large’)\n",
      "Results Table 2 summarizes our results. Compared with the vanillafew-shot prompting, a chain-\n",
      "of-thought and retrieval-augmented generation ( CoT RAG) program can self-bootstrap in DSPy to\n",
      "increase answer EM substantially. However, this relies entirely on the ColBERTv2 retriever to find\n",
      "relevant passages directly from the original questions, limiting its passage recall. This is tackled in\n",
      "the reactand multihopprograms, which will generate queries for the retriever in multiple iterative\n",
      "“hops”. Indeed, overall, a simple multihop program performs the best, and in general bootstrap\n",
      "again proves to be very effective at raising its quality relative to itsfewshot variant for both LMs.\n",
      "In particular, we can see that bootstrap (and/or bootstrap×2) can outperform both fewshot\n",
      "prompting (for multihop) and expert human reasoning (for react; adapted slightly from Yao et al.\n",
      "(2022) to our retrieval setting). Perhaps most importantly, we can makellama2-13b-chatcompet-\n",
      "itive with GPT-3.5 by simply compiling our programs.\n",
      "To assess the finetuning capacity of DSPy, we also evaluated the compiler multihop t5 defined\n",
      "above which produces a T5-Large (770M parameter) model. This program scores 39.3% answer\n",
      "EM and 46.0% passage accuracy on the dev set, using only 200 labeled inputs and 800 unlabeled\n",
      "10\n",
      "---------------------\n",
      "Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\n",
      "Query: What is DSPy?\n",
      "Answer: \n"
     ]
    }
   ],
   "source": [
    "print(messages[1].content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
